In [23]:
import pandas as pd
import numpy as np
import plotly
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
In [2]:
### Load vaccination data. ####
url_vaccination = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv'
owid_vaccination = pd.read_csv(url_vaccination, error_bad_lines=False, parse_dates = ['date'])
owid_vaccination.head()

# Get list of countries in the Our World In Data (owid) vaccination dataset. 
owid_vaccination_countries = list(owid_vaccination.location.unique())


### Load owid dataset that has population by country as of 2020. ###
url_population = 'https://raw.githubusercontent.com/owid/covid-19-data/master/scripts/input/un/population_2020.csv'
owid_population = pd.read_csv(url_population, error_bad_lines=False)
owid_population.head()

# Rename columns to later merge with owid vaccination dataframe.
owid_population = owid_population.rename(columns={"entity": "location"})

# Get list of countries in the Our World In Data (owin) population dataset. 
owid_population_countries = list(owid_population.location.unique())


### Load data with mapping of country to country group as defined in the Fiscal Space dataset. ###
path = '/Users/victoriadequadros/Dropbox/debt_exposure_covid/auxiliary_files/'
file_name = 'country_mapping.xlsx'
sheet = 'Fiscal Space and OWID' # sheet name or sheet number or list of sheet numbers and names
country_map = pd.read_excel(io=path+file_name, sheet_name=sheet,  usecols = "B,C")
country_map.head(5)

# Rename columns to later merge with owid dataframe.
country_map = country_map.rename(columns={"Country group": "country_group", "Country name in Our World in Data": "location"})

# Get list of countries in the Fiscal Space dataset + Taiwan.
fs_countries = list(country_map['location'])
In [3]:
# Check countries that are in OWID vaccination but are not in FS
vacc_not_in_fs = np.setdiff1d(owid_vaccination_countries, fs_countries)

# Check countries that are in the OWID population but are not in FS 
pop_not_in_fs = np.setdiff1d(owid_population_countries, fs_countries)

# Check countries that are in FS but are not in OWID vaccination
not_in_vacc = np.setdiff1d(fs_countries, owid_vaccination_countries)
In [4]:
# Merge datasets. Resulting dataset has two new columns: 'country_group' and 'population'.
# We merge using only keys from vaccination dataframe. 
owid_merged1 = owid_vaccination.merge(country_map, how='left', on='location')
owid_merged = owid_merged1.merge(owid_population[['location', 'population']], how='left', on='location')
In [5]:
# Create new dataset that sums: total vaccinations, daily vaccination, and population by date and country group. 
# Call this new dataset that is by country group simply by owid
owid = owid_merged.groupby(['country_group', 'date']).agg({'total_vaccinations':'sum', 'daily_vaccinations_raw':'sum', 'daily_vaccinations':'sum', 'population':'sum'}).reset_index()

# Create variable that is the cumulative sum of daily vaccinations.
owid['cum_daily_vacc'] = owid.groupby(['country_group'])['daily_vaccinations'].cumsum()

# Create new column with total_vaccinations per hundred
owid['total_vaccinations_per_hundred'] = (owid['cum_daily_vacc']/owid['population'])*(100)

# Drop obs before 15th december 2020. This is because only Advanced Economies had started. 
owid = owid[~(owid['date'] < '2020-12-15')]

# Prepare data for plotting. 
owid = owid.set_index('date')

# Create separate dataframes.
owid_advanced = owid[owid.country_group == "Advanced Economies"]
owid_emde = owid[owid.country_group == "EMDEs"]

# Do a 30-day moving average of total vaccivations per hundred
#owid_advanced['ma_total_vaccinations_per_hundred'] = owid_advanced['total_vaccinations_per_hundred'].rolling(7).mean()
#owid_emde['ma_total_vaccinations_per_hundred'] = owid_emde['total_vaccinations_per_hundred'].rolling(7).mean()

x = np.array(owid_advanced.index.values)
y1 = np.array(owid_advanced['total_vaccinations_per_hundred'])
y2 = np.array(owid_emde['total_vaccinations_per_hundred'])

plt.plot(x, y1, "-b", label="Advanced Economies")
plt.plot(x, y2, "-r", label="EMDEs")
plt.legend(loc="upper left")
plt.ylim(0, 140)
plt.title("Total vaccinations per hundred")
plt.savefig("total_vaccinations_per_hundred.png")
plt.show()
In [14]:
owid.sort_values(by = ['country_group', 'date', 'total_vaccinations_per_hundred'], ascending = False)
Out[14]:
country_group total_vaccinations daily_vaccinations_raw daily_vaccinations population cum_daily_vacc total_vaccinations_per_hundred
date
2021-07-19 EMDEs 2.447416e+09 14629709.0 22247770.0 4.674605e+09 2.624386e+09 56.141346
2021-07-18 EMDEs 2.059421e+09 13331190.0 24076770.0 5.196730e+09 2.602138e+09 50.072614
2021-07-17 EMDEs 2.516087e+09 21824636.0 24332350.0 5.549480e+09 2.578062e+09 46.455916
2021-07-16 EMDEs 2.453281e+09 24689460.0 23674745.0 5.586865e+09 2.553729e+09 45.709525
2021-07-15 EMDEs 2.509761e+09 27286272.0 23368678.0 5.609536e+09 2.530055e+09 45.102740
... ... ... ... ... ... ... ...
2020-12-19 Advanced Economies 1.196500e+04 721.0 86497.0 1.273833e+08 5.145020e+05 0.403901
2020-12-18 Advanced Economies 1.118400e+04 3981.0 86911.0 1.187278e+08 4.280050e+05 0.360493
2020-12-17 Advanced Economies 7.203000e+03 4179.0 86516.0 1.187278e+08 3.410940e+05 0.287291
2020-12-16 Advanced Economies 3.025000e+03 2300.0 85626.0 1.129356e+08 2.545780e+05 0.225419
2020-12-15 Advanced Economies 7.230000e+02 718.0 84835.0 1.129356e+08 1.689520e+05 0.149600

434 rows × 7 columns

In [15]:
import plotly.graph_objects as go

X = owid[owid['country_group']=='Advanced Economies'].index

fig = go.Figure()


fig.add_trace(go.Scatter(x=X, 
                         y=owid[owid['country_group']=='Advanced Economies']['total_vaccinations_per_hundred'],
                         name='Advanced'))

fig.add_trace(go.Scatter(x=X, 
                         y=owid[owid['country_group']=='EMDEs']['total_vaccinations_per_hundred'],
                         name='EMDEs'))

fig.update_layout(title='Total vaccinations per hundred',
                  template='plotly_white',
                  xaxis_title='Date',
                  yaxis_title='')

fig.show()
In [6]:
# Just to explore which countries were leading the uptick in vaccinations. 

# Create variable that is the cumulative sum of daily vaccinations.
owid_merged['cum_daily_vacc'] = owid_merged['daily_vaccinations'].cumsum()

# Create new column with total_vaccinations per hundred
owid_merged['total_vaccinations_per_hundred'] = (owid_merged['cum_daily_vacc']/owid_merged['population'])*(100)

owid_merged.head()

owid_merged = owid_merged.sort_values(by = ['location', 'date'])

owid_merged['daily_diff_vacc_per_hundred'] = owid_merged.groupby(['location'])['total_vaccinations_per_hundred'].diff()

owid_merged

owid_merged = owid_merged.sort_values(by = ['country_group', 'date', 'daily_diff_vacc_per_hundred'], ascending = False)

owid_merged[owid_merged['country_group']=='EMDEs']
Out[6]:
location iso_code date total_vaccinations people_vaccinated people_fully_vaccinated daily_vaccinations_raw daily_vaccinations total_vaccinations_per_hundred people_vaccinated_per_hundred people_fully_vaccinated_per_hundred daily_vaccinations_per_million country_group population cum_daily_vacc daily_diff_vacc_per_hundred
19843 Mauritius MUS 2021-07-19 1040903.0 608103.0 432800.0 NaN 19910.0 609319.604377 47.82 34.03 15655.0 EMDEs 1.271767e+06 7.749126e+09 1.565538
29845 Sri Lanka LKA 2021-07-19 7361893.0 5675329.0 1686564.0 114910.0 274790.0 41390.419292 26.50 7.88 12833.0 EMDEs 2.141325e+07 8.863034e+09 1.283271
19096 Malaysia MYS 2021-07-19 14772221.0 10097841.0 4674380.0 424936.0 411316.0 23934.005458 31.20 14.44 12708.0 EMDEs 3.236600e+07 7.746480e+09 1.270827
27145 Saudi Arabia SAU 2021-07-19 22870428.0 18441821.0 4428607.0 223939.0 377528.0 24492.242103 52.97 12.72 10844.0 EMDEs 3.481387e+07 8.526697e+09 1.084418
33258 Uruguay URY 2021-07-19 4543194.0 2472903.0 2070291.0 30372.0 31165.0 325489.990348 71.19 59.60 8972.0 EMDEs 3.473727e+06 1.130663e+10 0.897163
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
25755 Russia RUS 2020-12-17 NaN NaN NaN NaN 3357.0 5792.129465 NaN NaN 23.0 EMDEs 1.459345e+08 8.452713e+09 0.002300
6116 China CHN 2020-12-16 NaN NaN NaN NaN 187500.0 181.298886 NaN NaN 130.0 EMDEs 1.439324e+09 2.609478e+09 NaN
25754 Russia RUS 2020-12-16 NaN NaN NaN NaN 3357.0 5792.127164 NaN NaN 23.0 EMDEs 1.459345e+08 8.452710e+09 NaN
6115 China CHN 2020-12-15 1500000.0 NaN NaN NaN NaN NaN NaN NaN NaN EMDEs 1.439324e+09 NaN NaN
25753 Russia RUS 2020-12-15 28500.0 28500.0 NaN NaN NaN NaN 0.02 NaN NaN EMDEs 1.459345e+08 NaN NaN

20949 rows × 16 columns

In [24]:
plotly.offline.init_notebook_mode()
In [ ]: